<--- %%NOBANNER%% --> phlev.sas
 BackForward
/*------------------<--- Start of Description -->--------------------\
| Returns the leverage residuals matrix L for a proportional hazards |
| model.  The returned residuals data set is n by p, where n is the  |
| number of (non-missing) subjects used in the PHREG fit, and p is   |
| the number of fitted covariates.  The leverage residuals are an    |
| approximation to the jackknife; the i-th observation of the dataset|
| contains the approximate change in beta if observation i were      |
| dropped from the model.  These residuals will normally be plotted  |
| as an aid to finding influential or "high leverage" observations.  |
| Some data sets, particularily those with recurring events, may     |
| contain multiple observations for a single individual. The macro   |
| has the option of "collapsing" the matrix of leverage residuals    |
| so that the output has only one observation per unique individual. |
| The procedure can also estimate the robust covariance estimate     |
| L'L.  When there are multiple, possibly correlated, observations   |
| per subject this option combined with the collapsing mentioned     |
| above, gives an estimate of variance that is corrected for the     |
| correlation.                                                       |
|--------------------<--- End of Description -->---------------------|
|--------------------------------------------------------------------|
|--------------<--- Start of Files or Arguments Needed -->-----------|
| Arguments:                                                         |
|   - Required:                                                      |
|     data=input SAS data set name                                   |
|     time=time variable for survival.                               |
|          This may be a single variable or an interval expressed    |
|          as (t1,t2).                                               |
|     event=event variable for survival, 1=event, 0=censored         |
|     xvars=list of covariates for Cox model                         |
|   - Optional:                                                      |
|     strata=stratification variable(one only) for stratifed Cox     |
|            models.                                                 |
|     id=name of id variable to be included in the output dataset.   |
|            It must be numeric.                                     |
|     collapse= Y,N,T,F.  If yes(Y,T), then the residual matrix is   |
|            collapsed (summed) based on unique values of the id     |
|            variable. Default is  not to collapse(N).               |
|     outlev= name of the data set containing the leverage residuals.|
|            The variables names will be the same as the covariate   |
|            names in the Cox model. The data set will also contain  |
|            the ID variable, if one was specified. Default data set |
|            name is "phlev".                                        |
|     outvar = name of the output data set containing the robust     |
|            variance estimate.  There is one observation and one    |
|            variable for each covariate. Default data set name is   |
|            "phvar".                                                |
|     plot = Y,N,T,F. If yes(Y,T), a plot of residuals for each      |
|            observation or ID value(if collapse=Y) is               |
|            produced.  A separate plot is created for each          |
|            covariate.  Default is N.                               |
|     scaled = Y,N,T,F. If yes(Y,T), the scaled score residuals      |
|            are plotted.  If N(default), the raw residuals          |
|            are used.                                               |
|     ref = Y,N,T,F. If yes(Y,T), reference lines are drawn on the   |
|            plots at +-se(beta).  Default is N.                     |
|     print = Y,N.  If N, the phreg printout is suppressed.          |
|            Default is N                                            |
|     showlog = Y,N.  If N, the SAS log is suppressed.               |
|            Default is N.                                           |
|     ties = Breslow,Discrete,Efron,Exact. Method to use for handling|
|            failure time ties.  Default is Breslow.                 |
| Output: The macro prints the PHREG output used to fit the Cox model|
|     and a summary table including the Cox model betas, SE and chi- |
|     square along with the robust SE and its associated chi-square. |
|     It also includes the global Wald chi-square test based on the  |
|     leverage residuals.  The summary table is available as a SAS   |
|     data set named _b_se.                                          |
|---------------<--- End of Files or Arguments Needed -->------------|
|--------------------------------------------------------------------|
|----------------<--- Start of Example and Usage -->-----------------|
| Example:                                                           |
|  %LET XX= RX1 RX2 RX3 RX4 NUMBER1 NUMBER2 NUMBER3 NUMBER4 SIZE1    |
|         SIZE2 SIZE3 SIZE4;                                         |
|  %PHLEV(DATA=Bladder,TIME=time,EVENT=stat,                         |
|         XVARS= &XX, STRATA=enum, ID=id, COLLAPSE=Y);               |
| Usage: %phlev(data=,time=,event=,xvars=,strata=, id=, collapse=N,  |
|              outlev=phlev, outvar=phvar,plot=N, scaled=N, ref=N,   |
|               print=N, showlog=N, ties=breslow);                   |
| References:                                                        |
|   The leverage residuals are discussed in Cain and Lange,          |
|   Biometrics 40:, 493-9 (1984).                                    |
|   The robust variance estimate L'L is derived in Lin and Wei,      |
|   JASA 84: 725-8 (1989).                                           |
|   Extension of this to correlated data, using the collapsed        |
|   leverage matrix is developed in Wei, Lin, and Weissfeld, JASA 84:|
|   1065-83, amoung others.                                          |
|   An overview of the methods is found in Mayo Clinic Tech Report58.|
\-------------------<--- End of Example and Usage -->---------------*/
 %macro phlev(data=,time=,event=,xvars=,
              strata=, id=, collapse=N, outlev=phlev, outvar=phvar,
              plot=N, scaled=N, ref=N, print=N, showlog=N, ties=breslow);
/*--------------------------------------------\
| Author:  E. Bergstralh, J. Kosanke and      |
|          T. Therneau                        |
| Created: September 8, 1993;                 |
| Purpose: return leverage residual matrix for|
|          proportional hazard model;         |
\--------------------------------------------*/
 %if %upcase(&showlog)=N %then %do;
    %if &sysscpl=Solaris %then %do;
       proc printto log='/dev/null';
    %end;  
    %if &sysscpl=MVS %then %do;
       filename dummy '&&temp' disp=(new,delete,delete);
       proc printto log=dummy;
    %end;
 %end;
%local t paren timeint timeint1 timeint2 i nxs sp;
%if %upcase(&collapse)=T %then %let collapse=Y;
%if %upcase(&collapse)=F %then %let collapse=N;
%if %upcase(&plot)=T %then %let plot=Y;
%if %upcase(&plot)=F %then %let plot=N;
%if %upcase(&scaled)=T %then %let scaled=Y;
%if %upcase(&scaled)=F %then %let scaled=N;
%if %upcase(&ref)=T %then %let ref=Y;
%if %upcase(&ref)=F %then %let ref=N;
%let paren=%str(%();
%if %index(&time,&paren)>0 %then %do;
   %let timeint=1;
   %let timeint1=%qscan(&time,1);
   %let timeint2=%qscan(&time,2);
%end;
%else %let timeint=0;
footnote"phlev macro: event=&event time=&time strata=&strata id=&id collapse=
&collapse ties=&ties";
footnote2"Xvars= &xvars";
 %let nxs=0;             %**count the number of x vars;
 %do i=1 %to 50;
   %if %scan(&xvars,&i)= %then %goto done;
   %let nxs=%eval(&nxs+1);
 %end;
 %done: %put &nxs;
 %macro xlist(prefix);    %**set-up var list code;
  %local j;
  %do j=1 %to &nxs; &prefix&j %end;
 %mend xlist;
data _setup; set &data;         **delete obs with missing values;
  keep &id
  %if &timeint=0 %then %do;
     &time
  %end;
  %if &timeint=1 %then %do;
     &timeint1 &timeint2
  %end;
       &event &strata &xvars;
  xmiss=0;
  %do i=1 %to &nxs;
  xx=%scan(&xvars,&i);
  if xx=. then xmiss=1;
  %end;
  if &event=.
  %if &timeint=0 %then %do;
     or &time=.
  %end;
  %if &timeint=1 %then %do;
     or &timeint1=. or &timeint2=.
  %end;
  or xmiss=1 then delete;
                                ** run phreg and grab output datasets;
proc phreg data=_setup noprint;
 model &time*&event(0)= &xvars / maxiter=0 ties=&ties;
 %if &strata ^= %then %do; strata &strata; %end;
 output out=_out1(keep=&id dfb1-dfb&nxs) dfbeta=dfb1-dfb&nxs;
 id &id;
 %if %upcase(&collapse)=Y %then %do;
proc sort data=_out1 ; by &id;        **collapse the dataset..sum within id;
proc means noprint; by &id;       var dfb1-dfb&nxs;
 output out=_out1  sum=dfb1-dfb&nxs;
run;
 %end;
proc means noprint;               var dfb1-dfb&nxs;
 output out=_out2  sum=sum1-sum&nxs;
run;
proc phreg data=_setup covout outest=_est
 %if %upcase(&print)=N %then %do;
    noprint
 %end;
 ;
 model &time*&event(0)= &xvars / ties=&ties;
 %if &strata ^= %then %do; strata &strata; %end;
 output out=_sch ressco=s1-s&nxs;
 id &id;
/* Check for bad model */
data _null_; set _est(where=(_type_='PARMS'));
   bad=0;
   %do i=1 %to &nxs;
      if %scan(&xvars,&i)=0 then bad=1;
   %end;
   call symput('getout',bad);
run;
%if &getout=1 %then %do;
   data _null_;
    file print;
      put 'Singular model - no robust solution produced';
      put '%phlev will stop processing';
   run;
%end;
%else %do;
   data _sch5 ; set _sch(keep=&id &strata
     %if &timeint=0 %then %do;
        &time
     %end;
     %if &timeint=1 %then %do;
        &timeint1 &timeint2
     %end;
    &event s1-s&nxs);
     keep &strata &id
     %if &timeint=0 %then %do;
        &time
     %end;
     %if &timeint=1 %then %do;
        &timeint1 &timeint2
     %end;
          &event &xvars;
     label
     %if &timeint=0 %then %do;
        &time=
     %end;
     %if &timeint=1 %then %do;
        &timeint1=
        &timeint2=
     %end;
          &event=  ;
     %do i=1 %to &nxs;            **rename scor_i  vars to xvars;
      %scan(&xvars,&i) =s&i;
     %end;
    *proc print data=_sch5 ;
    *title3'Crude score residuals';
   run;
    %if %upcase(&collapse)=Y %then %do;
   proc sort data=_sch5 ; by &id;        **collapse the dataset..sum within id;
   proc means noprint; by &id;       var &xvars;
    output out=_sch5  sum=&xvars;
    *title3"Crude score residuals..collapsed(summed) over &id";
    *proc print data=_sch5 ;
   run;
    %end;
   proc iml;                      **Invoke IML to get L..scaled residuals;
    use _sch5  ; setin _sch5  ;
    read all var{ &xvars      } into r;
      %if &id ^= %then %do;
    read all var{&id } into id_time;
      %end;
    use _est; setin _est;
    read all var{&xvars     } into covb where(_type_="COV  ");
    read var{&xvars     } into beta  where(_type_="PARMS");
    L=r*covb;                     **Scaled score residuals;
    LTL=L`*L;                     **Robust var matrix;
    se_cox=sqrt(vecdiag(covb));   **SE from Cox model;
    se_scr=sqrt(vecdiag(LTL ));   **SE based on L'*L;
    chisqcox=(beta` / se_cox)##2; **Chi-square Cox model;
    chisqrob=(beta` / se_scr)##2; **Chi-square based on L'*L;
    wald_x2=beta*ginv(LTL)*beta`; **Global Wald chi-square..robust;
    eigv=eigval(LTL);
    df=sum(eigv > .000000000001);        **Wald degrees freedom;
    p_wald= 1 - probchi(wald_x2,df); **Wald p-value;
    idl= %if &id ^= %then %do; id_time || %end;  L;
    b_se= beta` || se_cox || se_scr || chisqcox || chisqrob;
    wald= wald_x2 || df || p_wald;
    use _out1; setin _out1;
    read all var{%do i=1 %to &nxs; dfb&i %end; } into dzero;
    use _out2; setin _out2;
    read all var{%do i=1 %to &nxs; sum&i %end; } into temp;
    roblr=temp*inv(dzero`*dzero)*temp`;
    p_lr=1-probchi(roblr,df);
    rob_lr=roblr || df || p_lr;
    bl_1x5=j(1,5,.);
    bl_px3=j(&nxs,3,.);
    table= (b_se || bl_px3) // ( bl_1x5 || wald ) // ( bl_1x5 || rob_lr );
    %let sp=%str( );
    varn    ={ %if &id^= %then %do; "&id" %end;
               %do i=1 %to &nxs;
                 %let t= "%scan(&xvars,&i)" &sp ; &t
               %end;
              };
    xnames={
               %do i=1 %to &nxs;
                 %let t= "%scan(&xvars,&i)" &sp ; &t
               %end;
              };
    xname ={
               %do i=1 %to &nxs;
                 %let t= "%scan(&xvars,&i)" &sp ; &t
               %end;
              "wald" "robust score"
              };
    sen={ "b_cox" "se_cox" "se_robst" "chi_cox" "chi_rob"
          "chi_w_lr" "df" "p_w_lr" };
    create &outlev from  idl[colname=varn    ];
    append from idl;
    create &outvar  from  ltl[rowname=xnames colname=xnames];
    append from ltl[rowname=xnames];
    create _b_se     from  table[ rowname=xname  colname=sen   ];
    append from table[rowname=xname ];
    *show datasets;
    *show contents;
    quit;                        ** quit IML;
    title4"Comparison of Cox model Beta, SE and chi-square to robust estimates";
    title5"Wald chi-square is based on the robust estimates";
    %if %upcase(&collapse)=Y %then %do;
    title6"Robust SE is based on the collapsed(summed within &id) L matrix";
    %end;
    data _b_se; set _b_se;
       if _n_<=&nxs then p=1-probchi(chi_rob,1);
       if _n_>=&nxs+1 then p=p_w_lr;
     label xname='Variable'
           b_cox='Parameter/Estimate'
           se_cox='SE'
           se_robst='Robust/SE'
           chi_cox='Chi-Square'
           chi_rob='Robust/Chi-Square'
           chi_w_lr='Chi-Square';
       %if %upcase(&scaled)=Y %then %do i=1 %to &nxs;
          if _n_=&i then do;
             call symput("pvref&i",se_robst);
             call symput("nvref&i",-1*se_robst);
          end;
       %end;
    proc print data=_b_se label split='/';
    id xname;
    var b_cox se_cox se_robst chi_cox chi_rob chi_w_lr df p;
    format chi_cox chi_rob chi_w_lr 7.3 p 6.4;
    run;
    footnote; symbol; title4;
    %if %upcase(&plot)=Y %then %do;
       %if %upcase(&scaled)=N %then %do;
          %if &id= %then %do;
             data _sch5; set _sch5;
                obs=_n_;
             %let id=obs;
          %end;
          proc plot data=_sch5;
           plot (&xvars)*&id;
          title4 'Plot of Raw Residuals';
       %end;
       %if %upcase(&scaled)=Y %then %do;
          %if &id= %then %do;
             data &outlev; set &outlev;
                obs=_n_;
             %let id=obs;
          %end;
          proc plot data=&outlev;
          %do i=1 %to &nxs;
             %let xvble=%scan(&xvars,&i);
             plot &xvble*&id
             %if %upcase(&ref)=Y %then %do;
                /vref=&&pvref&i &&nvref&i
             %end;
             ;
          %end;
          title4 'Plot of Scaled Residuals';
       %end;
    %end;
    proc datasets nolist;
      delete _setup _est _sch _sch5 _out1 _out2;
    run;
    quit;
%end;
%if %upcase(&showlog)=N %then %do;
   proc printto; run;
%end;
 %mend phlev ;